# This file contains the R code that was used to implement the estimators and functionals in "A new test of Independence", JMVA (2017)
# As a fully working example the code herein can reproduce Figure 4b of the paper. 
# The same functions (with different input data) can be used in reproducing other figures of the same paper 
# More general use of the estimates is also feasible by supplying the corresponding data set.  Use of the functions is self - explanatory and is further facilitated by the comments at he preamble of each function.

# Bn and Dn are correct and use cutt-off points according to the literature:
# Dn uses the Harrel (2001) - Hmisc package computation and cutt off points
# Bn uses the empirical cutt off points of Hudholkar and Wilding (2003, The statistician (JRSS D))

# Tn is not the max version but uses on bandwidth instead (not the max statistic as it resuts from many bandwidths)
# The bandwidth used is obtained by the glkern package and corresponds to global (MISE optimal) bandwidth rule for 
# fixed design kernel regression (according to Prakash's email we are focusing on fixed regressors, is.rand=FALSE for glkerns)
# N.B. : Yu and Jones, JASA use kernel regression optimal bandwidth as a starting point to further refine and obtain 
# quantile regression optimal bandwidth - this is not implemented yet. 

#Moreover: the present routine is specific to sample size = 50 for the Tn statistic, i.e. the empirical quantile and subsequent calcs 
# use n=50. 


options(echo=FALSE)
rm(list=ls())

library("Hmisc")
library("lokern")
library("nor1mix")
library("kSamples")
library("coin")
library("copula")

fSqArgSwitch<-function(x, dist, p1=0,p2=1)
{
  switch(dist, gaussian= (dnorm(x, p1,p2))^2
         , exponential = (dexp(x,p1))^2)
}

fArgSwitch<-function(x, dist, p1=0,p2=1)
{
  switch(dist, gaussian= dnorm(x, p1,p2)
         , exponential = dexp(x,p1))
}

fRKswitch<-function(kernel)
{
  switch(kernel, rectangular= 0.5
         , gaussian = 1/(2*sqrt(pi))
  )
}

fConvolution<-function(kernel)
{
  switch(kernel, rectangular= 0.09375
         , gaussian = 1/(2*sqrt(2*pi)) #convolution of standard normal density 3 times with it self evaluated at 0.
  )
}



Rchatfxhat<-function(xin, yin, h, n, DensityEst)
{
  
  # p<-c(1:100)/100
  #integrand<-sapply(p, function(p, xin, yin, h, N) (pcf.kern.quan(xin,yin,h,N=n, p)$value)^2 * DensityEst, xin, yin, h, N)
  integrand<-function(p) {qnorm( p)}
  chat<-integrate(integrand, 0.00006, 1)$value
  #mean(integrand )
  mean(chat * 0.2820948)#(1/6)*sqrt(3)/pi )  
  chat
}


hopt<-function(xin, yin, dist, kernel, p1, p2 )
{
  n<-length(xin)
  h<-bw.nrd(xin) #bw.SJ(xin)
  DensityEst<- kde(xin,xin, h, gaussian)
  Rhatfx<-mean(DensityEst)
  RhatfxUse<-Rhatfx^{3/2}
  Intfx<-   mean(DensityEst^2)     #estimate \int f_X^3(x)\,dx by \mathbb E f_X^2(x) = \frac{1}{n}\sum_{i=1}^n f_X^2(X_i) 
  #for the normal distribution the exact value = (1/6)*sqrt(3)/pi, check with the estimate
  RK<- 1/(2*sqrt(pi)) #fRKswitch(kernel)        #R(K)=\int_{-\infty}^{+infty} K^2(x)\,dx, computation done in maple, currently supports gaussian, rectangular
  RKuse<-RK^{3/2}
  ConvK<-   1/(2*sqrt(2*pi)) #fConvolution(kernel)
  term1 <- (sqrt(2)*ConvK*Intfx)/(3*RKuse*RhatfxUse)
  term1use<- term1^{-1/2}
  
  
  SigmaSq <- 16/45 * RK * Rhatfx
  Nu2<-Intfx #mean(fSqArgSwitch(xin, dist, p1,p2)) # \sim \frac{1}{n}\sum_{i=1}^n f_X(X_i)
  
  RDeltaR<- Rchatfxhat(xin,yin, h, n, DensityEst) 
  
  term2<- (n*RDeltaR)/(SigmaSq*sqrt(2*Nu2*RK))
  term2use <- term2^{-3/2}
  banduse<- term1use * term2use  #return proposed bandwidth: for comparison, the method used so far returns bandwidths in the range of 0.4 to 1.7
  banduse
  
}




kgaussian<-function(x)
{
  result <- 0.3989 * exp(-0.5*x*x)
  result
}

"gaussian" <- function(x) { dnorm(x)}


"kde"<-function(xin, xout, h, kfun)
{
  n<- length(xin)
  arg1<-(sapply(xout, "-", xin))/h  
  arg2<-kfun(arg1)
  arg3<-colSums(arg2)
  arg3/(n*h)
}



int1 <- function(x) { dnorm(x)^2} 
F1<-integrate(int1, lower = -Inf, upper = Inf)$value


int2<-function(x) { dnorm(x)^2} 
F2<-integrate(int2, lower = -Inf, upper = Inf)$value



T.n2<-function(xi, yi, kernel, h)
{
  n<- length(xi)
  arg1<-(sapply(xi, "-", xi))/h  
  arg2<-kgaussian(arg1) #  K{(x_i-x_j)/h}
  
  arg3<-rank(yi)
  
  arg3.1<- sapply(arg3, "pmin", arg3)
  arg3.2 <- sapply(arg3, "pmax", arg3)
  arg3.1 <- (arg3.1^2)/(n+1)^2
  arg3.2 <- ((n- arg3.2)^2)/(n+1)^2 - 1/3
  
  arg4<-arg3.1 + arg3.2  # (arg3.3+arg3.4)/((n+1)^2) - 1/3 #(min(Rni,Rnj)^2 + (n-max(Rni,Rnj))^2)/n^2 - 1/3
  
  arg5<-	 arg2 * arg4
  
  arg5[lower.tri(arg5, diag=T)]<-0 # set all elements for which 1<= i<j<=n does not hold =0
  2*sum(arg5)/(n*(n-1)*h )
}


B.n<-function(xi,yi)
{
  n<-length(xi)
  n1n2n3n4<-sapply(1:n, function(i, xi,yi)
  {
    xiuse<-xi[i]
    yiuse<-yi[i]
    n1<- length(which(xi<=xiuse & yi<=yiuse))
    n2<- length(which(xi>xiuse & yi<=yiuse))
    n3<- length(which(xi<=xiuse & yi>yiuse))
    n4<- length(which(xi>xiuse & yi>yiuse))
    #cat("\n", n1+n2+n3+n4)
    c(n1,n2,n3,n4)	
  },xi,yi)
  nMat<-matrix(n1n2n3n4, nrow=n, ncol=4, byrow=T)
  #nMat<- t(n1n2n3n4)
  sum((nMat[,1]*nMat[,4]-nMat[,2]*nMat[,3])^2)/n^4
}

D.n<-function(xi,yi)
{
  n<-length(xi)
  abc<-sapply(1:n, function(i, xi,yi)
  {
    xiuse<-xi[i]
    yiuse<-yi[i]
    ause<- xiuse-xi >0 #xiuse = X_a, xi=X_b - for all b
    ause1<-replace(ause, F, 0)
    ause<-sum(ause1)-1
    buse<- yiuse-yi >0 #yiuse = Y_a, yi=Y_b - for all b
    buse1<-replace(buse, F, 0)
    buse<-sum(buse1)-1
    cuse<- sum(ause1 * buse1)-1
    c(ause, buse, cuse)	
  },xi,yi)
  
  abcMat<-	t(abc)
  
  A1<-as.double(abcMat[,1])
  B1<-as.double(abcMat[,2])
  C1<-as.double(abcMat[,3])
  
  A<- sum(A1*(A1-1)*B1*(B1-1))
  B<- sum((A1-1)*(B1-1)*C1)
  C<- sum((C1-1)*C1)
  n<-as.double(n)
  (A-2*(n-2)*B+(n-2)*(n-3)*C)/(n*(n-1)*(n-2)*(n-3)*(n-4))
  
}

PowerGraphs1<-function(SampleSize,  cutoff,hfactor,  F1)
{
  correl<-0
  correluse<-0	
  PowerMat<-matrix(0,nrow=10000, ncol=1)
  CorMat<-matrix(0,nrow=10000, ncol=1)
  for(j in 1:10000)
  {
    
    cl2 <- frankCopula(0.01, dim = 2) # param in (0.01, 25)
    v<-rCopula(SampleSize, cl2)
    x1<-v[,1]
    x2<-v[,2]
    # testcor<-abs(cor(x1, x2))
    # while(testcor>cutoff)
    # {
    #   x1<-rnorm(SampleSize)
    #   x2<-rnorm(SampleSize)
    #   testcor<-abs(cor(x1, x2))
    # }
    x1<- x1 #generate independent sample sizes #rnorm(SampleSize, 0, 1) #AllSamples[,2*j-1]  #
    y1<- x2 #rnorm(SampleSize, 0, 1) #AllSamples[,2*j]  #
    ro<-correluse/100     # correlation
    z1<- ro*x1+sqrt(1-ro^2)*y1 # create the dependent to x1 sample
    band2<-hfactor*glkerns(x1,z1, is.rand=FALSE)$bandwidth
    #band2<-hfactor * hopt(x1, y1,  "gaussian","gaussian", 0, 1)
    
    #t1<- T.n2(x1,z1, "kgaussian", band1)
    t2<- T.n2(x1,z1, "kgaussian", band2)
    
    CorMat[j,]<-cor(x1,z1)
    #PowerMat[j,1]<- t1 # c(b1,d1, t1) #store the test statistic values
    PowerMat[j,1]<- t2 
  }	
  
  #  cTn1<-PowerMat[,1]
  cTn2<-PowerMat[,1]
  
  
  #  Out1<-quantile(cTn1, cutoff)
  Out2<-quantile(cTn2, cutoff)
  # c(Out1, Out2)
  Out2
}



PowerGraphs<-function(SampleSize=100, iterations, hfactor, F1, F2,  quse, BnCutoff, Dna.a)
{
  duse<-indepTestSim(SampleSize, 2, 2, 1000, verbose = FALSE)
  correl<-seq(0.01,50, length=iterations) #range of correlations to examine
  TBDmatrix<-matrix(0, nrow=iterations, ncol=8) #initialize the matrix which will contain the powers - first column is our test, second the Bn and third the Dn
  for(i in 1:iterations)
  {
    correluse<-correl[i]
    PowerMat<-matrix(0,nrow=100, ncol=7)
    CorMat<-matrix(0,nrow=100, ncol=1)
    for(j in 1:100)
    {
      #x1<-rnorMix(SampleSize, MW.nm11)
      #x2<-rnorMix(SampleSize, MW.nm11)
      cl2 <- frankCopula(correluse, dim = 2) # param in (0.01, 25)
      v<-rCopula(SampleSize, cl2)
      x1<-v[,1]
      z1<-v[,2]
      # testcor<-abs(cor(x1, x2))
      # while(testcor>cutoff)
      # {
      #   x1<- rnorm(SampleSize)
      #   x2<- rnorm(SampleSize)
      #   
      #   testcor<-abs(cor(x1, x2))
      # }
      #x1<- x1 
      #y1<- x2 
      ro<-correluse/100     # correlation
      #z1<- ro*x1+sqrt(1-ro^2)*y1 # create the dependent to x1 sample
      b1<-B.n(x1,z1)
      d1<- hoeffd(x1,z1)$P[1,2] #D.n(x1,z1)#  hoeffd(x1,z1)$P[1,2]
      e1<- cor.test(x1, z1, method="spearman", distribution="approximate")$p.value # SPEARMAN - NOT USED IN THIS SCRIPT!!!!!#qn.test(x1,z1, test="vdW", method = "simulated",   dist = FALSE, Nsim = 1000)$qn[2] #simulated (finite sample) p-val. van der Wearden Statisic
      f1<- pvalue(fisyat_test(x1~ z1, distribution="approximate")) #qn.test(x1,z1, test="vdW", method = "simulated",   dist = FALSE, Nsim = 1000)$qn[3] #simulated (finite sample) p-val. van der Wearden Statisic
      itg<-indepTest(cbind(x1,z1), duse,  alpha=0.05)$global.statistic.pvalue
      
      #  cat(d1, " ", hoeffd(x1,z1)$D[1,2], "\n")
      band2<-hfactor*glkerns(x1,z1, is.rand=FALSE)$bandwidth
      #band2<-hfactor* hopt(x1, y1,  "gaussian","gaussian", 0, 1)
      m.data<-data.frame(x1,z1)
      ff1<-independence_test(x1 ~ z1,  distribution="approximate", data = m.data,  ytrafo = function(data) savage_trafo(data),  xtrafo = function(data)  savage_trafo(data))
      #ff1<- savage_test(object,  conf.int = FALSE, conf.level = 0.95, ...)#independence_test(x1 ~ y1, data = m.data,   xtrafo = function(data)  savage_trafo(data))
      
      j1<-  pvalue(ff1)
      #t1<- T.n2(x1,z1, "kgaussian", band1)
      t2<- T.n2(x1,z1, "kgaussian", band2)
      
      CorMat[j,]<-cor(x1,z1)
      PowerMat[j,]<- c(b1, d1, e1, f1, j1, itg, t2) #store the test statistic values
      #cat(c(b1,d1, e1, t2), "\n")
    }	
    
    aBn<-PowerMat[,1]
    bDn<-PowerMat[,2]
    cS<-PowerMat[,3]
    cW<-PowerMat[,4]
    cSav<-PowerMat[,5]
    cItg<-PowerMat[,6]
    cTn1<-PowerMat[,7]
    # cTn2<-PowerMat[,4]
    
    
    a<-length(which(aBn >  BnCutoff)  )# for n=40, a=0.95 the cutt-off point (Mudholkar and Wilding (2003) is 0.0649, for n=50 is 0.0637, n=30 is 0.0665)
    b<-length(which(bDn<Dna.a))
    c<-length(which(cS<Dna.a))
    # b<-length(which(bDn>Dna.a))
    # c<-length(which(cTn1> quse[1]  ))#que1
    d<- length(which(cW<Dna.a))
    e<- length(which(cSav<Dna.a))
    pitg<- length(which(cItg<Dna.a))
    j<-length(which(cTn1> quse[1]  ))#que2
    
    TBDmatrix[i,]<-c(colMeans(CorMat), a,b, c, d, e, pitg, j)	
  }
  
  TBDmatrix
}


Simulation<-function(Sample, iterations, It2, hfactor, Tn.Conf, cutB.n, CutD.n)
{
  matrixAllBn<-matrix(nrow=iterations, ncol=It2)
  matrixAllDn<-matrix(nrow=iterations, ncol=It2)
  matrixAllSn<-matrix(nrow=iterations, ncol=It2)
  matrixAllWn<-matrix(nrow=iterations, ncol=It2)
  matrixAllSavn<-matrix(nrow=iterations, ncol=It2)
  matrixAllitg<-matrix(nrow=iterations, ncol=It2)
  matrixAllTn1<-matrix(nrow=iterations, ncol=It2)
  #matrixAllTn2<-matrix(nrow=iterations, ncol=It2)
  
  matrixCor<-matrix(nrow=iterations, ncol=It2)
  
  quse<- PowerGraphs1(SampleSi, Tn.Conf, hfactor, F1)
  cat("for n=", Sample, "the ", Tn.Conf, " sign. level value is ", quse[1])
  for(i in 1:It2)
  {
    cat("\ni= ", i)
    arg1<-PowerGraphs(SampleSi,iterations, hfactor, F1, F2, quse, cutB.n, CutD.n)
    matrixCor[,i]<-arg1[,1]
    matrixAllBn[,i]<-arg1[,2]
    matrixAllDn[,i]<-arg1[,3]
    matrixAllWn[,i]<-arg1[,4]
    matrixAllSn[,i]<-arg1[,5]
    matrixAllSavn[,i]<-arg1[,6]
    matrixAllitg[,i]<-arg1[,7]
    matrixAllTn1[,i]<-arg1[,8]
    #matrixAllTn2[,i]<-arg1[,5]
  }
  xout<- rowMeans(matrixCor) #seq(1,100, length=iterations)/100
  plot(xout, rowMeans(matrixAllBn), type="l", ylim=c(0,100), ylab="Power", xlab="Correlation")
  lines(xout, rowMeans(matrixAllDn), lty=2, col=1)
  lines(xout, rowMeans(matrixAllWn),lty=3, col=1)
  lines(xout, rowMeans(matrixAllSn),lty=4, col=1)
  lines(xout, rowMeans(matrixAllSavn),lty=5, col=1)
  lines(xout, rowMeans(matrixAllitg),lty=6, col=1)
  lines(xout, rowMeans(matrixAllTn1),lty=7, col=1)
  
  #write.csv(matrixAllBn, file="matrixAllBn.csv")
  #write.csv(matrixAllDn, file="matrixAllDn.csv")
  #write.csv(matrixAllTn1, file="matrixAllTn1.csv")
  #lines(xout, rowMeans(matrixAllTn2), lty=2, col=5)
  legend("bottomright", c(expression(B[n]), expression(D[n]), expression(W[n]) ,  expression(S[n]), expression(J[n]), expression(G[n]), expression(T[n])), lty=c(1,2,3, 4, 5, 6, 7), col=c(1,1,1,1, 1, 1,1), merge=TRUE, ncol=3)#, bg= 'gray90')
  #initial.dir<-"C:\\Users\\dimitrios.bagkavos\\Documents\\R\\Independence\\Test small samples\\n=40"
  #setwd(initial.dir)
  #dev.copy2eps("n=40hfactor=0.69.eps",width=6,height=6)
}





set.seed(155)

iterations<-100 # number of correlation points in [0,1]
It2<-40 # average the results of It2 replications
#Cut off values for B_n by sample size:
# n=20, a=0.90, 95, 99: 0.0557, 0.0704, 0.1072 respectively
# n=30, a=0.90, 95, 99: 0.0528, 0.0665, 0.1006 respectively
# n=40, a=0.90, 95, 99: 0.0516, 0.0649, 0.0979 respectively
# n=80, a=0.90, 95, 99: 0.0493, 0.0617, 0.0923 respectively


#a=90%
SampleSi<-60


Simulation(SampleSi, iterations, It2, 1.5, 0.95, 0.0627, 0.05) #0.0649 #0.0354 inst. of 0.05 p.val